The Dataset contains 338 columns and 1002 Rows, where rows contain the data of the individual choices and their age,education,sex etc.As my task is restricted on some factors only so i will not use the whole dataset.I will create a subset of my original dataset where i keep those rows only which is needed for calculation.
Lisbon_data2 <- Lisbon_data[,c(1:3,5,6,9,11,128:137,338)]
head(Lisbon_data2)
## qb q1a q1b q4 q5 q8 q9b sex marital age class region area work indust
## 1 2 3 NA 3 -99.99 10 2 3 2 3 4 3 3 4 -99.99
## 2 2 2 3 2 3.00 7 2 2 2 5 6 3 3 3 5.00
## 3 2 3 NA 2 3.00 7 3 2 3 3 6 3 3 5 -99.99
## 4 2 2 2 2 3.00 7 3 2 2 5 2 3 3 3 13.00
## 5 2 2 3 2 3.00 7 3 2 2 5 4 3 3 4 -99.99
## 6 2 3 NA 2 3.00 8 4 2 3 4 5 3 3 6 -99.99
## sector tea exage
## 1 3.00 3 23
## 2 3.00 3 47
## 3 -99.99 3 22
## 4 3.00 3 38
## 5 3.00 3 44
## 6 -99.99 3 25
Next i rename the columns of the dataset to better understanding of what they represent so that i can interpret my solution based on that
colnames(Lisbon_data2) <- c("Registered_Voter","oct_vote","oct_type","June_vote",
"June_type","EU_Knowledge","Switzerland_Member","Sex",
"Marital","Age_type","Class","Region","Area_type",
"Working_status","Industry_type",
"Working_sector","Education_status","Age")
Lisbon_data2[1:10,1:5]
## Registered_Voter oct_vote oct_type June_vote June_type
## 1 2 3 NA 3 -99.99
## 2 2 2 3 2 3.00
## 3 2 3 NA 2 3.00
## 4 2 2 2 2 3.00
## 5 2 2 3 2 3.00
## 6 2 3 NA 2 3.00
## 7 2 2 2 2 2.00
## 8 2 2 2 2 3.00
## 9 2 2 2 2 2.00
## 10 2 2 2 2 2.00
#Next i group the eu knowledge parameter into 3 section, poor good and average.
Lisbon_data2$EU_Knowledge[Lisbon_data2$EU_Knowledge >7] <- "Good"
Lisbon_data2$EU_Knowledge[Lisbon_data2$EU_Knowledge <= 7 &
Lisbon_data2$EU_Knowledge >= 4] <- "Average"
Lisbon_data2$EU_Knowledge[Lisbon_data2$EU_Knowledge < 4] <- "Poor"
#except the age attribute all the variable present in my dataset is categorical so i need to transform it into factors.
Lisbon_data2$Registered_Voter <- as.factor(Lisbon_data2$Registered_Voter)
Lisbon_data2$oct_vote <- as.factor(Lisbon_data2$oct_vote)
Lisbon_data2$oct_type <- as.factor(Lisbon_data2$oct_type)
Lisbon_data2$June_vote <- as.factor(Lisbon_data2$June_vote)
Lisbon_data2$June_type <- as.factor(Lisbon_data2$June_type)
Lisbon_data2$EU_Knowledge <- as.factor(Lisbon_data2$EU_Knowledge)
Lisbon_data2$Switzerland_Member <- as.factor(Lisbon_data2$Switzerland_Member)
Lisbon_data2$Sex <- as.factor(Lisbon_data2$Sex)
Lisbon_data2$Marital <- as.factor(Lisbon_data2$Marital)
Lisbon_data2$Age_type <- as.factor(Lisbon_data2$Age_type)
Lisbon_data2$Class <- as.factor(Lisbon_data2$Class)
Lisbon_data2$Region <- as.factor(Lisbon_data2$Region)
Lisbon_data2$Area_type <- as.factor(Lisbon_data2$Area_type)
Lisbon_data2$Working_status <- as.factor(Lisbon_data2$Working_status)
Lisbon_data2$Industry_type <- as.factor(Lisbon_data2$Industry_type)
Lisbon_data2$Working_sector <- as.factor(Lisbon_data2$Working_sector)
Lisbon_data2$Education_status <- as.factor(Lisbon_data2$Education_status)
#In my dataset the categories coded with number, it's good for efficency of the model but now my task is to explain and understand the behaviour of each category for that i have to relabel it into their original names.
levels(Lisbon_data2$Registered_Voter) <- c("Yes","No","Unknown")
levels(Lisbon_data2$oct_vote) <- c("Vote","Didn't Vote","Unknown")
levels(Lisbon_data2$oct_type) <- c("Favour","Against","Didn't Vote")
levels(Lisbon_data2$June_vote) <- c("Vote","Didn't Vote","Unknown")
levels(Lisbon_data2$June_type) <- c("Favour","Against","Didn't Vote")
levels(Lisbon_data2$Switzerland_Member) <- c("True","False","Don't Know")
levels(Lisbon_data2$Sex) <- c("Male","Female")
levels(Lisbon_data2$Marital) <- c("Married","Single","Other")
levels(Lisbon_data2$Age_type) <- c("18-21","22-24","25-34","35-49",
"50-64","65+")
levels(Lisbon_data2$Class) <- c("Administrative","Supervisory","Skilled Manual",
"Unskilled manual","Farmers(>50 acre land)",
"Farmers(<50 acre land)")
levels(Lisbon_data2$Region) <- c("Dublin","Leinster","Munster","Ulster")
levels(Lisbon_data2$Area_type) <- c("Urban","Rural")
levels(Lisbon_data2$Working_status) <- c("Housewife","Self_Employed","Employee",
"Unemployed(Job Searching)",
"Unemployed(Not searching jobs)",
"Student","Retired")
levels(Lisbon_data2$Industry_type) <- c("Unknown","Construction","Computer/IT",
"Finance","Agriculture","Food Production",
"Government","Leisure","Manufacturing",
"Media","Profesionals","Retail",
"Other Services","Other")
levels(Lisbon_data2$Working_sector) <- c("Unknown","Public","Private")
levels(Lisbon_data2$Education_status) <- c("Primary","Secondary","Third level",
"School/Colllege")
Lisbon_data2$oct_type[is.na(Lisbon_data2$oct_type)] <- "Didn't Vote"
summary(Lisbon_data2)
## Registered_Voter oct_vote oct_type June_vote
## Yes :964 Vote :756 Favour :485 Vote :724
## No : 19 Didn't Vote:241 Against :271 Didn't Vote:261
## Unknown: 19 Unknown : 5 Didn't Vote:246 Unknown : 17
##
##
##
##
## June_type EU_Knowledge Switzerland_Member Sex Marital
## Favour :278 Average:572 True :243 Male :492 Married:591
## Against :311 Good :385 False :524 Female:510 Single :318
## Didn't Vote:413 Poor : 45 Don't Know:235 Other : 93
##
##
##
##
## Age_type Class Region Area_type
## 18-21: 63 Administrative :134 Dublin :290 Urban:624
## 22-24: 70 Supervisory :311 Leinster:248 Rural:378
## 25-34:213 Skilled Manual :236 Munster :288
## 35-49:299 Unskilled manual :260 Ulster :176
## 50-64:225 Farmers(>50 acre land): 43
## 65+ :132 Farmers(<50 acre land): 18
##
## Working_status Industry_type Working_sector
## Housewife :188 Unknown :537 Unknown:447
## Self_Employed :102 Other Services:121 Public :151
## Employee :453 Government :103 Private:404
## Unemployed(Job Searching) : 66 Construction : 51
## Unemployed(Not searching jobs): 30 Agriculture : 39
## Student : 59 Finance : 35
## Retired :104 (Other) :116
## Education_status Age
## Primary : 75 Min. :18.00
## Secondary :588 1st Qu.:30.00
## Third level :280 Median :42.00
## School/Colllege: 59 Mean :43.59
## 3rd Qu.:56.00
## Max. :82.00
##
Q.2
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
attach(Lisbon_data2)
#Bivariate Plot's
ggplot(Lisbon_data2,mapping = aes(oct_vote,fill = oct_type)) + geom_bar() +
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
ggtitle("Distribution of Votes based on the parameters that and individual voted or not")
#To create a pie chart of the above graph i have to build a table and then plot it.
oct <- as.data.frame(prop.table(table(oct_type)))
plot_ly(oct, labels = ~oct_type, values = ~Freq,
type = 'pie',textposition = 'outside',textinfo = 'label+percent') %>%
layout(title = "Voting percentage by considering those who didn't vote")
#Now let's build a pie chart by considering those individual's only who voted in the referendum.
oct2 <- Lisbon_data2$oct_type[Lisbon_data2$oct_type != "Didn't Vote"]
oct3 <- as.data.frame(table(oct2))
oct3 <- oct3[1:2,]
oct3
## oct2 Freq
## 1 Favour 485
## 2 Against 271
plot_ly(oct3, labels = ~oct2, values = ~Freq,
type = 'pie',textposition = 'outside',textinfo = 'label+percent') %>%
layout(title = "Voting percentage after removing those who didn't vote")
Q.3
ggplot(Lisbon_data2,mapping = aes(oct_type,Age,fill = oct_type))+geom_boxplot()+
geom_boxplot(width=0.8) + stat_summary(geom="text", fun.y=quantile,
aes(label=sprintf("%1.1f", ..y..), color=oct_type),
position=position_nudge(x=0.5), size=3.5) +
ggtitle("Distribution of Age according to the Vote's Given")
## Warning: `fun.y` is deprecated. Use `fun` instead.
table(oct_type,Age_type)
## Age_type
## oct_type 18-21 22-24 25-34 35-49 50-64 65+
## Favour 19 23 93 147 127 76
## Against 12 13 56 94 65 31
## Didn't Vote 32 34 64 58 33 25
ggplot(Lisbon_data2,mapping = aes(oct_vote,fill = Age_type))+geom_bar()+
facet_grid(~Age_type) +
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5))+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Distribution of individual who voted or not based on age group")
ggplot(Lisbon_data2,mapping = aes(oct_type,fill = Age_type)) + geom_bar()+
facet_grid(~Age_type) +
geom_text(aes(label=..count..),
stat="count",position=position_stack(0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on the age group")
ggplot(Lisbon_data2,mapping = aes(oct_type,fill = Age_type)) + geom_bar()+
facet_grid(~Age_type) +
geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Percentage distribution of voting output based on the age group")
Lisbon_data3 <- Lisbon_data2[Lisbon_data2$oct_type != "Didn't Vote",]
head(Lisbon_data3)
## Registered_Voter oct_vote oct_type June_vote June_type EU_Knowledge
## 2 Yes Vote Against Vote Didn't Vote Average
## 4 Yes Vote Favour Vote Didn't Vote Average
## 5 Yes Vote Against Vote Didn't Vote Average
## 7 Yes Vote Favour Vote Against Good
## 8 Yes Vote Favour Vote Didn't Vote Average
## 9 Yes Vote Favour Vote Against Average
## Switzerland_Member Sex Marital Age_type Class Region
## 2 True Male Married 35-49 Farmers(>50 acre land) Leinster
## 4 False Male Married 35-49 Administrative Leinster
## 5 False Male Married 35-49 Skilled Manual Leinster
## 7 Don't Know Female Married 65+ Farmers(>50 acre land) Leinster
## 8 False Female Married 50-64 Unskilled manual Munster
## 9 False Male Other 65+ Unskilled manual Munster
## Area_type Working_status Industry_type Working_sector Education_status Age
## 2 Rural Self_Employed Agriculture Private Secondary 47
## 4 Rural Self_Employed Other Services Private Secondary 38
## 5 Rural Employee Unknown Private Secondary 44
## 7 Rural Housewife Unknown Unknown Primary 76
## 8 Urban Housewife Unknown Unknown Third level 52
## 9 Urban Retired Unknown Unknown Primary 69
#next consider those individual only who voted in the referendum.
ggplot(Lisbon_data3,mapping = aes(oct_type,Age,fill = oct_type))+geom_boxplot()+
geom_boxplot(width=0.8) + stat_summary(geom="text", fun.y=quantile,
aes(label=sprintf("%1.1f", ..y..), color=oct_type),
position=position_nudge(x=0.5), size=3.5) +
ggtitle("Distribution of age of the people who voted in the referendum")
## Warning: `fun.y` is deprecated. Use `fun` instead.
ggplot(Lisbon_data3,mapping = aes(oct_type,fill = Age_type)) + geom_bar()+
facet_grid(~Age_type) +
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on the age group")
ggplot(Lisbon_data3,mapping = aes(oct_type,fill = Age_type)) + geom_bar()+
facet_grid(~Age_type) + geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Percentage distribution of voting output based on the age group")
#Next i will create a table for the same.
table(Lisbon_data3$oct_type,Lisbon_data3$Age_type)
##
## 18-21 22-24 25-34 35-49 50-64 65+
## Favour 19 23 93 147 127 76
## Against 12 13 56 94 65 31
## Didn't Vote 0 0 0 0 0 0
prop.table(table(Lisbon_data3$oct_type,Lisbon_data3$Age_type),1)
##
## 18-21 22-24 25-34 35-49 50-64 65+
## Favour 0.03917526 0.04742268 0.19175258 0.30309278 0.26185567 0.15670103
## Against 0.04428044 0.04797048 0.20664207 0.34686347 0.23985240 0.11439114
## Didn't Vote
prop.table(table(Lisbon_data3$oct_type,Lisbon_data3$Age_type),2)
##
## 18-21 22-24 25-34 35-49 50-64 65+
## Favour 0.6129032 0.6388889 0.6241611 0.6099585 0.6614583 0.7102804
## Against 0.3870968 0.3611111 0.3758389 0.3900415 0.3385417 0.2897196
## Didn't Vote 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
Q.4
ggplot(Lisbon_data2,mapping = aes(oct_vote,fill = Class)) + geom_bar() +
facet_grid(~Class) + geom_text(aes(label=..count..),
stat="count",position=position_stack(0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Distribution of individual who voted or not based on class")
ggplot(Lisbon_data2,mapping = aes(oct_type , fill = Class)) + geom_bar() +
facet_grid(~Class) + geom_text(aes(label=..count..)
,stat="count",position=position_stack(0.5))+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on the economic class")
ggplot(Lisbon_data2,mapping = aes(oct_type , fill = Class)) + geom_bar() +
facet_grid(~Class) + geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Percentage distribution of voting output based on the economic class")
table(oct_type,Class)
## Class
## oct_type Administrative Supervisory Skilled Manual Unskilled manual
## Favour 86 169 89 103
## Against 22 79 76 81
## Didn't Vote 26 63 71 76
## Class
## oct_type Farmers(>50 acre land) Farmers(<50 acre land)
## Favour 30 8
## Against 7 6
## Didn't Vote 6 4
prop.table(table(oct_type,Class))
## Class
## oct_type Administrative Supervisory Skilled Manual Unskilled manual
## Favour 0.085828343 0.168662675 0.088822355 0.102794411
## Against 0.021956088 0.078842315 0.075848303 0.080838323
## Didn't Vote 0.025948104 0.062874251 0.070858283 0.075848303
## Class
## oct_type Farmers(>50 acre land) Farmers(<50 acre land)
## Favour 0.029940120 0.007984032
## Against 0.006986028 0.005988024
## Didn't Vote 0.005988024 0.003992016
#consider those only who voted in the referendum.
ggplot(Lisbon_data3,mapping = aes(oct_type , fill = Class)) + geom_bar() +
facet_grid(~Class) + geom_text(aes(label=..count..)
,stat="count",position=position_stack(0.5))+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on the economic class")
ggplot(Lisbon_data3,mapping = aes(oct_type , fill = Class)) + geom_bar() +
facet_grid(~Class) + geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5))+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on the economic class")
table(Lisbon_data3$oct_type,Lisbon_data3$Class)
##
## Administrative Supervisory Skilled Manual Unskilled manual
## Favour 86 169 89 103
## Against 22 79 76 81
## Didn't Vote 0 0 0 0
##
## Farmers(>50 acre land) Farmers(<50 acre land)
## Favour 30 8
## Against 7 6
## Didn't Vote 0 0
prop.table(table(Lisbon_data3$oct_type,Lisbon_data3$Class))
##
## Administrative Supervisory Skilled Manual Unskilled manual
## Favour 0.113756614 0.223544974 0.117724868 0.136243386
## Against 0.029100529 0.104497354 0.100529101 0.107142857
## Didn't Vote 0.000000000 0.000000000 0.000000000 0.000000000
##
## Farmers(>50 acre land) Farmers(<50 acre land)
## Favour 0.039682540 0.010582011
## Against 0.009259259 0.007936508
## Didn't Vote 0.000000000 0.000000000
Q.5
ggplot(Lisbon_data2,mapping = aes(oct_vote,fill = Education_status))+geom_bar()+
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~Education_status)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Distribution of individual who voted or not based on Education status")
ggplot(Lisbon_data2,mapping = aes(oct_type,fill = Education_status))+geom_bar()+
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~Education_status)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on Education status")
ggplot(Lisbon_data2,mapping = aes(oct_type,fill = Education_status))+geom_bar()+
geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) + facet_grid(~Education_status)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Percentage distribution of voting output based on Education status")
attach(Lisbon_data3)
## The following objects are masked from Lisbon_data2:
##
## Age, Age_type, Area_type, Class, Education_status, EU_Knowledge,
## Industry_type, June_type, June_vote, Marital, oct_type, oct_vote,
## Region, Registered_Voter, Sex, Switzerland_Member, Working_sector,
## Working_status
table(oct_type,Education_status)
## Education_status
## oct_type Primary Secondary Third level School/Colllege
## Favour 32 259 174 20
## Against 22 178 57 14
## Didn't Vote 0 0 0 0
prop.table(table(oct_type,Education_status))
## Education_status
## oct_type Primary Secondary Third level School/Colllege
## Favour 0.04232804 0.34259259 0.23015873 0.02645503
## Against 0.02910053 0.23544974 0.07539683 0.01851852
## Didn't Vote 0.00000000 0.00000000 0.00000000 0.00000000
prop.table(table(oct_type,Education_status),1)
## Education_status
## oct_type Primary Secondary Third level School/Colllege
## Favour 0.06597938 0.53402062 0.35876289 0.04123711
## Against 0.08118081 0.65682657 0.21033210 0.05166052
## Didn't Vote
prop.table(table(oct_type,Education_status),2)
## Education_status
## oct_type Primary Secondary Third level School/Colllege
## Favour 0.5925926 0.5926773 0.7532468 0.5882353
## Against 0.4074074 0.4073227 0.2467532 0.4117647
## Didn't Vote 0.0000000 0.0000000 0.0000000 0.0000000
ggplot(Lisbon_data3,mapping = aes(oct_type,fill = Education_status))+geom_bar()+
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~Education_status)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on Education status")
ggplot(Lisbon_data3,mapping = aes(oct_type,fill = Education_status))+geom_bar()+
geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) + facet_grid(~Education_status)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Percentage distribution of voting output based on Education status")
Q.6
ggplot(Lisbon_data2,mapping = aes(oct_vote,fill = EU_Knowledge))+geom_bar()+
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~EU_Knowledge)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Distribution of individual who voted or not based EU knowledge")
ggplot(Lisbon_data2,mapping = aes(oct_type,fill = EU_Knowledge))+geom_bar()+
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~EU_Knowledge) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on EU knowledge of individual")
ggplot(Lisbon_data2 , mapping = aes(oct_type,fill = EU_Knowledge))+geom_bar()+
geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) + facet_grid(~EU_Knowledge)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Percentage distribution of voting output based on EU knowledge of individual")
attach(Lisbon_data3)
## The following objects are masked from Lisbon_data3 (pos = 3):
##
## Age, Age_type, Area_type, Class, Education_status, EU_Knowledge,
## Industry_type, June_type, June_vote, Marital, oct_type, oct_vote,
## Region, Registered_Voter, Sex, Switzerland_Member, Working_sector,
## Working_status
## The following objects are masked from Lisbon_data2:
##
## Age, Age_type, Area_type, Class, Education_status, EU_Knowledge,
## Industry_type, June_type, June_vote, Marital, oct_type, oct_vote,
## Region, Registered_Voter, Sex, Switzerland_Member, Working_sector,
## Working_status
table(oct_type,EU_Knowledge)
## EU_Knowledge
## oct_type Average Good Poor
## Favour 340 118 27
## Against 149 110 12
## Didn't Vote 0 0 0
prop.table(table(oct_type,EU_Knowledge))
## EU_Knowledge
## oct_type Average Good Poor
## Favour 0.44973545 0.15608466 0.03571429
## Against 0.19708995 0.14550265 0.01587302
## Didn't Vote 0.00000000 0.00000000 0.00000000
prop.table(table(oct_type,EU_Knowledge),1)
## EU_Knowledge
## oct_type Average Good Poor
## Favour 0.70103093 0.24329897 0.05567010
## Against 0.54981550 0.40590406 0.04428044
## Didn't Vote
prop.table(table(oct_type,EU_Knowledge),2)
## EU_Knowledge
## oct_type Average Good Poor
## Favour 0.6952965 0.5175439 0.6923077
## Against 0.3047035 0.4824561 0.3076923
## Didn't Vote 0.0000000 0.0000000 0.0000000
ggplot(Lisbon_data3,mapping = aes(oct_type,fill = EU_Knowledge))+geom_bar()+
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~EU_Knowledge)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on EU Knowledge of individual")
ggplot(Lisbon_data3,mapping = aes(oct_type,fill = EU_Knowledge))+geom_bar()+
geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) + facet_grid(~EU_Knowledge)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Percentage distribution of voting output based on EU Knowledge of individual")
Q.7
#To analyze is the Eu knowledge of individual have related to the questionare that switzerland is a member of Eu i will use chi square test.
table(EU_Knowledge,Switzerland_Member)
## Switzerland_Member
## EU_Knowledge True False Don't Know
## Average 122 302 65
## Good 57 95 76
## Poor 14 19 6
# H0 = Eu knowledge of individual is independent of switzerland is a member of Eu.
# H1 = Eu knowledge of individual is dependent of switzerland is a member of Eu.
chisq.test(table(EU_Knowledge,Switzerland_Member))
##
## Pearson's Chi-squared test
##
## data: table(EU_Knowledge, Switzerland_Member)
## X-squared = 45.905, df = 4, p-value = 2.578e-09
#p value is 2.2e-16 < 0.05 so i have to reject null hypothesis and accept alternate hypothesis.
#That means Eu knowledge of individual is dependent of switzerland is a member of Eu, and to analyze the dependancy i will use some plot's.
table(EU_Knowledge,Switzerland_Member)
## Switzerland_Member
## EU_Knowledge True False Don't Know
## Average 122 302 65
## Good 57 95 76
## Poor 14 19 6
prop.table(table(EU_Knowledge,Switzerland_Member))
## Switzerland_Member
## EU_Knowledge True False Don't Know
## Average 0.161375661 0.399470899 0.085978836
## Good 0.075396825 0.125661376 0.100529101
## Poor 0.018518519 0.025132275 0.007936508
prop.table(table(EU_Knowledge,Switzerland_Member),2)
## Switzerland_Member
## EU_Knowledge True False Don't Know
## Average 0.63212435 0.72596154 0.44217687
## Good 0.29533679 0.22836538 0.51700680
## Poor 0.07253886 0.04567308 0.04081633
prop.table(table(EU_Knowledge,Switzerland_Member),1)
## Switzerland_Member
## EU_Knowledge True False Don't Know
## Average 0.2494888 0.6175869 0.1329243
## Good 0.2500000 0.4166667 0.3333333
## Poor 0.3589744 0.4871795 0.1538462
ggplot(Lisbon_data2,mapping = aes(EU_Knowledge,fill = Switzerland_Member))+geom_bar()+
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~Switzerland_Member)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Comparison of EU knowledge of an individual with the output of an questionare that is switzerland is a member of EU or not ")
ggplot(Lisbon_data2,mapping = aes(EU_Knowledge,fill = Switzerland_Member))+geom_bar()+
geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) + facet_grid(~Switzerland_Member)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Comparison in percentage of EU knowledge of an individual with the output of an questionare that is switzerland is a member of EU or not")
ggplot(Lisbon_data2,mapping = aes(oct_vote,fill = Switzerland_Member))+geom_bar()+
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~EU_Knowledge)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Distribution of individual who voted or not based on both EU knowledge and Switzerland membership questionare output")
ggplot(Lisbon_data2,mapping = aes(oct_type,fill = EU_Knowledge)) + geom_bar() +
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~Switzerland_Member) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on both EU knowledge and Switzerland membership questionare output")
ggplot(Lisbon_data2,mapping = aes(oct_type,fill = EU_Knowledge))+geom_bar()+
geom_text(aes(label=round((..count../sum(..count..))*100,2)),
stat="count",position=position_stack(0.5)) + facet_grid(~Switzerland_Member)+
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on both EU knowledge and Switzerland membership questionare output")
table(Lisbon_data3$EU_Knowledge,Lisbon_data3$Switzerland_Member)
##
## True False Don't Know
## Average 122 302 65
## Good 57 95 76
## Poor 14 19 6
prop.table(table(Lisbon_data3$EU_Knowledge,Lisbon_data3$Switzerland_Member))
##
## True False Don't Know
## Average 0.161375661 0.399470899 0.085978836
## Good 0.075396825 0.125661376 0.100529101
## Poor 0.018518519 0.025132275 0.007936508
prop.table(table(Lisbon_data3$EU_Knowledge,Lisbon_data3$Switzerland_Member),2)
##
## True False Don't Know
## Average 0.63212435 0.72596154 0.44217687
## Good 0.29533679 0.22836538 0.51700680
## Poor 0.07253886 0.04567308 0.04081633
prop.table(table(Lisbon_data3$EU_Knowledge,Lisbon_data3$Switzerland_Member),1)
##
## True False Don't Know
## Average 0.2494888 0.6175869 0.1329243
## Good 0.2500000 0.4166667 0.3333333
## Poor 0.3589744 0.4871795 0.1538462
ggplot(Lisbon_data3,mapping = aes(oct_type,fill = EU_Knowledge))+geom_bar() +
geom_text(aes(label=..count..),stat="count",position=position_stack(0.5)) +
facet_grid(~Switzerland_Member) + theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on both EU knowledge and Switzerland membership questionare output")
ggplot(Lisbon_data3,mapping = aes(oct_type,fill = EU_Knowledge)) + geom_bar() +
geom_text(aes(label=round((..count../sum(..count..))*100,2)),stat="count",
position=position_stack(0.5)) + facet_grid(~Switzerland_Member) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("distribution of voting output based on both EU knowledge and Switzerland membership questionare output")